exp <- str_extract(getwd(), 'exp\\d[a-z]?')
d <- read.csv(paste0(here::here(), '/experiments/analysis/', exp, '/data/', exp, '_main_raw.csv'))
N <- length(unique(d$subject))
head(d)
Initial sample size is 41.
getwd()
## [1] "/home/dave/Dropbox (Lehigh University)/post_doc/professional/practical_new_world/gaita/experiments/analysis/exp3/scripts/preprocessing"
(mostly for adjusting after pilot)
d %>%
group_by(subject) %>%
summarize(runtime = max(phase_runtime_mins)) %>%
ggplot(aes(x = reorder(subject, runtime), y = runtime)) +
geom_bar(stat = 'identity') +
geom_hline(yintercept = 30, linetype = 'dashed') +
labs(
x = 'Subject',
y = 'Main Phase Runtime (mins)'
) +
theme_bw() +
theme(axis.text.x = element_blank())
Going to take everyone under an hour and average as the completion time.
runtimes <- d %>%
group_by(subject) %>%
summarize(runtime = max(phase_runtime_mins))
avg_runtime <- mean(runtimes[runtimes$runtime < 60,]$runtime)
desired_trials <- function(avg_runtime, n_trials = 308) {
mins_per_trial <- avg_runtime / n_trials
return(30 / mins_per_trial)
}
print(paste0('Target trials to be 30 min: ', round(desired_trials(avg_runtime))))
## [1] "Target trials to be 30 min: 305"
For visual search
## rt distribution
d %>%
#filter(subject %in% sample(unique(d$subject), 10)) %>%
ggplot(aes(x = search_rt)) +
geom_histogram() +
facet_wrap(~subject, scales = 'free') +
labs(
caption = 'A randomly-sampled ten participants'
)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## calc proportion dropped
proportion_dropped <- function(rows_before, rows_now) {
return((rows_before - rows_now) / rows_before)
}
## gross rt threshold
rows_before <- nrow(d)
d <- d[d$search_rt < 20000,]
trim_summary <- data.frame(reason = 'Search RTs above 20s', proportion_trimmed = proportion_dropped(rows_before, nrow(d)))
rows_before <- nrow(d)
d <- d[d$choice_rt < 20000,]
trim_summary <- rbind(trim_summary, data.frame(reason = 'Choice RTs above 20s', proportion_trimmed = proportion_dropped(rows_before, nrow(d))))
d %>%
group_by(subject) %>%
summarize(accuracy = 1 - mean(error), rt = mean(search_rt)) %>%
ggplot(aes(x = rt/1000, y = accuracy, group = subject)) +
geom_hline(yintercept = .75, linetype = 'dotted') +
geom_vline(xintercept = .3, linetype = 'dotted') +
geom_point() +
labs(
x = ' Average Response Time (s)',
y = 'Average Accuracy'
) +
theme_bw()
d$selected_deck_location_num <- ifelse(d$selected_deck_location == 'right', 1, -1)
d %>%
group_by(subject) %>%
summarize(sdln = mean(selected_deck_location_num)) %>%
ggplot(aes(x = subject, y = abs(sdln))) +
geom_hline(yintercept = .9, linetype = 'dotted') +
geom_point() +
coord_flip() +
ylim(0,1) +
labs(
x = 'Subject',
y = 'Location Bias'
) +
theme_bw() +
theme(axis.text.y = element_blank(),
axis.ticks = element_blank())
## cut subjects under 75% accuracy
bad_subjects <- d %>%
group_by(subject) %>%
summarize(error = mean(error), rt = mean(search_rt), location_bias = abs(mean(d$selected_deck_location_num))) %>%
filter(error > .25 | rt < .03 | location_bias > .9) %>%
select(subject)
n_bad_subjects <- nrow(bad_subjects)
d <- d[!(d$subject %in% bad_subjects$subject),]
There were 6 subjects dropped for having less than 75% accuracy.
## remove choice rts beyond
hist(d$choice_rt)
Average choice RT across Ps
d %>%
filter(choice_rt < 2500) %>%
ggplot(aes(x = choice_rt, group = subject)) +
geom_density(alpha = .3, fill = 'steelblue') +
labs(
x = 'Choice RT (ms)',
caption = 'Choices < 2,500 ms only.'
) +
theme_bw()
Really not crazy about these subjects having sharp peaks with \(<500~ms\).
rows_before <- nrow(d)
d <- d[d$choice_rt > 300 & d$choice_rt < 10000,]
print(paste0('Proportion dropped: ', round(proportion_dropped(rows_before, nrow(d)), 3), ', ', rows_before - nrow(d), '/', nrow(d)))
## [1] "Proportion dropped: 0.103, 1110/9634"
rows_before <- nrow(d)
d <- d %>%
group_by(subject) %>%
summarize(choice_rt_m = mean(choice_rt), choice_rt_sd = sd(choice_rt)) %>%
inner_join(d) %>%
filter(choice_rt >= choice_rt_m - (2*choice_rt_sd) & choice_rt <= choice_rt_m + (2*choice_rt_sd))
## Joining, by = "subject"
print(paste0('Proportion dropped: ', round(proportion_dropped(rows_before, nrow(d)), 3), ', ', rows_before - nrow(d), '/', nrow(d)))
## [1] "Proportion dropped: 0.043, 414/9220"
Compute variables
d$ssd <- 1 - d$selectedRiskyDeck
write.csv(d, paste0(here::here(), '/experiments/analysis/', exp, '/data/', exp, '_main.csv'), row.names = FALSE)
A work by Dave Braun
dab414@lehigh.edu